#!/bin/bash

source $LKP_SRC/lib/log.sh
source $LKP_SRC/lib/lkp_path.sh

set -o pipefail

program_dir=$(dirname $0)
program=$1
stats_group=${2:-$1}
log=$RESULT_ROOT/$stats_group

LKP_SRC_ETC=$(lkp_src)/etc
# exit processing the stats if the program is not in the program_list.
# program_list is a file that records all the programs(setups, monitors, tests,
# daemons) being executed during the test.
# this is to solve the problem for cluster test jobs running on the server node
# where no program's log file is generated for running server daemons but the
# stats processing for the program running on client node will also be handled
# on server node due to the current Job2sh algorithm.

# default stats are not in the program_list
grep -q "^$program\:" $LKP_SRC_ETC/default_stats.yaml
if [ $? -ne 0 ]; then
	[ -f $RESULT_ROOT/program_list ] && {
		grep -q "$program" $RESULT_ROOT/program_list || exit 0
	}
fi

check_incomplete_run()
{
	local file=$1

	[[ -s $file ]] && return
	[[ -f $LKP_SRC/tests/$program ]] || return

	echo -e "# missing $program $file\nis_incomplete_run: 1" >> $RESULT_ROOT/last_state
}

check_soft_timeout()
{
	local file=$RESULT_ROOT/soft_timeout
	[[ -f $file ]] || return

	grep -xq "soft_timeout: 1" $RESULT_ROOT/last_state ||
	echo -e "soft_timeout: 1" >> $RESULT_ROOT/last_state
}

check_die()
{
	# test.locktorture.exit_code.99: 1
	[[ -f $RESULT_ROOT/last_state ]] || return 0

	grep -q "exit_code.99: 1" $RESULT_ROOT/last_state && {
		log_warn "job exited due to die" "$RESULT_ROOT"
		return 1
	}

	return 0
}

check_env_issue()
{
	# wget_initrd_fail is mostly caused by env issue not kernel, we
	# better mv rt to bad.
	[[ -f $RESULT_ROOT/last_state ]] || return 0

	grep -q "wget_initrd_fail: 1" $RESULT_ROOT/last_state && {
		log_warn "job exited due to wget_initrd_fail" "$RESULT_ROOT"
		return 1
	}

	return 0
}

check_empty_output()
{
	[[ -s $log ]] && return

	[[ $program = 'kernel-size' ]] && return
	[[ $program = 'perf-profile' ]] && return
	[[ $program = 'dmesg' && -s $RESULT_ROOT/kmsg ]] && return
	[[ $program = 'kmsg' && -s $RESULT_ROOT/dmesg ]] && return

	rm -f $log $log.gz
	exit 0
}

check_binary_output()
{
	[[ $program = dmesg ]] && return
	[[ $program = kmsg  ]] && return
	[[ $program = mpstat  ]] && return
	[[ $program = iostat  ]] && return

	# kmsg may actually read the dmesg file
	# refer to the exception cases in check_empty_output()
	[[ -e $log ]] || return

	grep -Paq '\x0' $log || return

	log_warn "$program: skip binary file" "$log"
	exit 0
}

warn_empty_stats()
{
	[[ -s $tmpfile ]] && return
	[[ $testbox =~ ^vm- ]] && return
	[[ $program = 'dmesg' ]] && return
	[[ $program = 'ftrace' ]] && return
	[[ $program = 'turbostat' ]] && return
	[[ $program = 'perf-profile' ]] && return
	[[ -s $RESULT_ROOT/time ]] || return
	[[ -s $RESULT_ROOT/last_state ]] && return
	grep -q "^$program\." $LKP_SRC_ETC/failure && return
	grep -q "^$program\." $LKP_SRC_ETC/pass && return

	log_warn "$program: empty stats" "$log"
}

get_testcase()
{
	local testcase=$(echo "$RESULT_ROOT" | cut -f3 -d"/")
	testcase=${testcase#kvm:}

	echo "$testcase"
}

check_empty_json()
{
	[[ $testcase = 'borrow' ]] && return
	[[ $testcase = 'boot' ]] && return

	[[ -f $RESULT_ROOT/job.sh ]] || return 0

	# testcase maybe different with run_case
	# take fio-basic-1hdd-write.yaml job as example:
	# - testcase will be fio-basic
	# - run_case will be fio
	local run_case=$(awk '/\$LKP_SRC\/tests\/wrapper/ {print $NF}' $RESULT_ROOT/job.sh)

	[[ "$run_case" = "$stats_group" ]] || return 0

	if [[ ! -f $RESULT_ROOT/${stats_group}.json && ! -f $RESULT_ROOT/${stats_group}.json.gz ]]; then
		if [[ -f $RESULT_ROOT/last_state ]] && grep -q is_incomplete_run $RESULT_ROOT/last_state; then
			return 1
		else
			log_warn "$stats_group: no json file generated" "$RESULT_ROOT"
			echo -e "# no json file for $stats_group\nis_incomplete_run: 1" >> $RESULT_ROOT/last_state
			return 1
		fi
	fi
}

if [ -e $log.gz ]; then
	[ -s $log.gz ] || exit 0
	zcat $log.gz > $log
elif [ -e $log.xz ]; then
	[ -s $log.xz ] || exit 0
	xzcat $log.xz > $log
fi

kmsg_log=$RESULT_ROOT/kmsg

# extract kmsg for kmsg related stats
if [[ $log =~ ^"$RESULT_ROOT"/(boot-memory|boot-time|dmesg)$ ]]; then
	[ -f $kmsg_log.xz ] && xzcat $kmsg_log.xz > $kmsg_log
fi

dmesg_log=$RESULT_ROOT/dmesg

# extract dmesg for dmesg related stats
if [[ $log =~ ^"$RESULT_ROOT"/kmsg$ ]]; then
	[ -f $dmesg_log.xz ] && xzcat $dmesg_log.xz > $dmesg_log
fi

generate_log_from_serial()
{
	[[ -f $dmesg_log.xz ]] && xzcat "$dmesg_log".xz > "$dmesg_log"
	[[ -f $kmsg_log.xz ]] && xzcat "$kmsg_log".xz > "$kmsg_log"

	# remove [xxx][xxx] at the beginning of each line
	# vi $RESULT_ROOT/dmesg.xz
	# [    0.000000][    T0] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
	# [    0.000000][    T0] x86/fpu: xstate_offset[2]:  576, xstate_sizes[2]:  256
	# [    0.000000][    T0] x86/fpu: Enabled xstate features 0x7, context size is 832 bytes, using 'standard' format.
	# [    0.000000][    T0] signal: max sigframe size: 1776
	# [    0.000000][    T0] BIOS-provided physical RAM map:
	# [    0.000000][    T0] BIOS-e820: [mem 0x0000000000000100-0x000000000009ebff] usable
	if [[ -f $dmesg_log ]]; then
		perl -pe 's/^(\[.*?\])* *//g' < "$dmesg_log" > "$log"
	elif [[ -f $kmsg_log ]]; then
		perl -pe 's/^(\[.*?\])* *//g' < "$kmsg_log" > "$log"
	fi
}

testcase=$(get_testcase)

category=$(grep "^category" "$RESULT_ROOT"/job.yaml | awk '{print $2}')

if [[ "$program" = "$testcase" ]] && [[ -f "$LKP_SRC/tests/$program" ]] && [[ ! -f $log ]] && [[ "$category" = functional ]]; then
	generate_log_from_serial
fi

check_incomplete_run $log
check_soft_timeout
check_empty_output
check_binary_output
check_die || exit 1
check_env_issue || exit 1

# When stats script returns failure, we need decide whether it is a bad rt
# - job exists due to die, so that rt is bad. This is handled in check_die.
# - job has soft_timeout or OOM or panic, which may lead to the incomplete/missing
# output. The rt is not necessarily bad.
# - job runs to the end without the serious states mentioned in above bullets, then
# if any output is incomplete/missing, it is a bad rt.
is_bad_rt_candidate()
{
	[[ -f $RESULT_ROOT/last_state ]] || return 0

	# rli9 FIXME the logic can't handle panic situation now
	grep -q -e "soft_timeout: 1" -e "OOM: 1" $RESULT_ROOT/last_state && return 1

	return 0
}

tmpfile=$(mktemp -p /tmp -t lkp-stats.XXXXXXXX)
if [[ -f $log ]]; then
	$program_dir/$program $log < $log > $tmpfile || { log_error "failed to stat $program" "$program_dir/$program $log < $log\nerrno: $?\ntmpfile: $tmpfile"; is_bad_rt_candidate && exit 1; }
else
	$program_dir/$program < /dev/null > $tmpfile || { log_error "failed to stat $program" "$program_dir/$program < /dev/null\nerrno: $?\ntmpfile: $tmpfile"; is_bad_rt_candidate && exit 1; }
fi

grep -q "^$program\." <(cat $LKP_SRC_ETC/failure $LKP_SRC_ETC/pass) ||
check_incomplete_run $tmpfile

warn_empty_stats

dump_stat()
{
	local stats_group=$1
	local file=$2
	local stat_file=$3

	$LKP_SRC/sbin/dump-stat $stats_group $stat_file < $file || { log_error "failed to dump-stat $program" "errno: $?\nfile: $file"; exit 1; }
	rm $file
}

# cleanup program raw data
[[ $lite_mode = 1 ]] && {
    rm -f $RESULT_ROOT/$program
    rm -f $RESULT_ROOT/$program.xz
    rm -f $RESULT_ROOT/$program.gz
}

if [[ $program = 'ftrace' ]]; then
	for file in $RESULT_ROOT/$program.*.yaml
	do
		stats_group=$(basename $file .yaml)
		dump_stat $stats_group $file $stat_file
	done
else
	dump_stat $stats_group $tmpfile $stat_file
fi

# don't need to check empty json if save stat json to new place
[[ $stat_file ]] || check_empty_json

if [ -f $log.gz ]; then
	rm $log
elif [ -f $log.xz ]; then
	rm $log -f
fi

# delete temporarily extracted kmsg above
[ -f $kmsg_log.xz ] && rm -f $kmsg_log
# delete temporarily extracted dmesg above
[ -f $dmesg_log.xz ] && rm -f $dmesg_log

rm -f $tmpfile

exit 0
